mwait-idle: prevent SKL-H boot failure when C8+C9+C10 enabled
authorLen Brown <len.brown@intel.com>
Fri, 8 Apr 2016 20:31:47 +0000 (22:31 +0200)
committerJan Beulich <jbeulich@suse.com>
Fri, 8 Apr 2016 20:31:47 +0000 (22:31 +0200)
Some SKL-H configurations require "max_cstate=7" to boot.
While that is an effective workaround, it disables C10.

This patch detects the problematic configuration,
and disables C8 and C9, keeping C10 enabled.

Note that enabling SGX in BIOS SETUP can also prevent this issue,
if the system BIOS provides that option.

https://bugzilla.kernel.org/show_bug.cgi?id=109081
"Freezes with Intel i7 6700HQ (Skylake), unless intel_idle.max_cstate=7"

Signed-off-by: Len Brown <len.brown@intel.com>
[Linux commit: d70e28f57e14a481977436695b0c9ba165472431]

Adjust to Xen infrastructure.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Andrew Cooper <andrew.cooper3@citrix.com>
xen/arch/x86/cpu/mwait-idle.c
xen/include/asm-x86/msr-index.h
xen/include/public/arch-x86/cpufeatureset.h

index 0fcc130099ece20035e052ef390100a87c230e8f..9e193155caa18befdccbde7f06fe67c8a1ddb87f 100644 (file)
@@ -60,7 +60,7 @@
 #include <asm/msr.h>
 #include <acpi/cpufreq/cpufreq.h>
 
-#define MWAIT_IDLE_VERSION "0.4"
+#define MWAIT_IDLE_VERSION "0.4.1"
 #undef PREFIX
 #define PREFIX "mwait-idle: "
 
@@ -100,6 +100,7 @@ static const struct cpuidle_state {
        unsigned int    target_residency; /* in US */
 } *cpuidle_state_table;
 
+#define CPUIDLE_FLAG_DISABLED          0x1
 /*
  * Set this flag for states where the HW flushes the TLB for us
  * and so we don't need cross-calls to keep it consistent.
@@ -477,7 +478,7 @@ static const struct cpuidle_state bdw_cstates[] = {
        {}
 };
 
-static const struct cpuidle_state skl_cstates[] = {
+static struct cpuidle_state skl_cstates[] = {
        {
                .name = "C1-SKL",
                .flags = MWAIT2flg(0x00),
@@ -781,36 +782,86 @@ static const struct x86_cpu_id intel_idle_ids[] __initconst = {
 };
 
 /*
- * mwait_idle_state_table_update()
- *
- * Update the default state_table for this CPU-id
+ * ivt_idle_state_table_update(void)
  *
- * Currently used to access tuned IVT multi-socket targets
+ * Tune IVT multi-socket targets
  * Assumption: num_sockets == (max_package_num + 1)
  */
-static void __init mwait_idle_state_table_update(void)
+static void __init ivt_idle_state_table_update(void)
 {
        /* IVT uses a different table for 1-2, 3-4, and > 4 sockets */
-       if (boot_cpu_data.x86_model == 0x3e) { /* IVT */
-               unsigned int cpu, max_apicid = boot_cpu_physical_apicid;
-
-               for_each_present_cpu(cpu)
-                       if (max_apicid < x86_cpu_to_apicid[cpu])
-                               max_apicid = x86_cpu_to_apicid[cpu];
-               switch (apicid_to_socket(max_apicid)) {
-               case 0: case 1:
-                       /* 1 and 2 socket systems use default ivt_cstates */
-                       break;
-               case 2: case 3:
-                       cpuidle_state_table = ivt_cstates_4s;
-                       break;
-               default:
-                       cpuidle_state_table = ivt_cstates_8s;
-                       break;
-               }
+       unsigned int cpu, max_apicid = boot_cpu_physical_apicid;
+
+       for_each_present_cpu(cpu)
+               if (max_apicid < x86_cpu_to_apicid[cpu])
+                       max_apicid = x86_cpu_to_apicid[cpu];
+       switch (apicid_to_socket(max_apicid)) {
+       case 0: case 1:
+               /* 1 and 2 socket systems use default ivt_cstates */
+               break;
+       case 2: case 3:
+               cpuidle_state_table = ivt_cstates_4s;
+               break;
+       default:
+               cpuidle_state_table = ivt_cstates_8s;
+               break;
        }
 }
 
+/*
+ * sklh_idle_state_table_update(void)
+ *
+ * On SKL-H (model 0x5e) disable C8 and C9 if:
+ * C10 is enabled and SGX disabled
+ */
+static void sklh_idle_state_table_update(void)
+{
+       u64 msr;
+
+       /* if PC10 disabled via cmdline max_cstate=7 or shallower */
+       if (max_cstate <= 7)
+               return;
+
+       /* if PC10 not present in CPUID.MWAIT.EDX */
+       if ((mwait_substates & (MWAIT_CSTATE_MASK << 28)) == 0)
+               return;
+
+       rdmsrl(MSR_NHM_SNB_PKG_CST_CFG_CTL, msr);
+
+       /* PC10 is not enabled in PKG C-state limit */
+       if ((msr & 0xF) != 8)
+               return;
+
+       /* if SGX is present */
+       if (boot_cpu_has(X86_FEATURE_SGX)) {
+               rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
+
+               /* if SGX is enabled */
+               if (msr & (1 << 18))
+                       return;
+       }
+
+       skl_cstates[5].flags |= CPUIDLE_FLAG_DISABLED;  /* C8-SKL */
+       skl_cstates[6].flags |= CPUIDLE_FLAG_DISABLED;  /* C9-SKL */
+}
+
+/*
+ * mwait_idle_state_table_update()
+ *
+ * Update the default state_table for this CPU-id
+ */
+static void __init mwait_idle_state_table_update(void)
+{
+       switch (boot_cpu_data.x86_model) {
+       case 0x3e: /* IVT */
+               ivt_idle_state_table_update();
+               break;
+       case 0x5e: /* SKL-H */
+               sklh_idle_state_table_update();
+               break;
+       }
+}
+
 static int __init mwait_idle_probe(void)
 {
        unsigned int eax, ebx, ecx;
@@ -897,6 +948,14 @@ static int mwait_idle_cpu_init(struct notifier_block *nfb,
                if (num_substates == 0)
                        continue;
 
+               /* if state marked as disabled, skip it */
+               if (cpuidle_state_table[cstate].flags &
+                   CPUIDLE_FLAG_DISABLED) {
+                       printk(XENLOG_DEBUG PREFIX "state %s is disabled",
+                              cpuidle_state_table[cstate].name);
+                       continue;
+               }
+
                if (dev->count >= ACPI_PROCESSOR_MAX_POWER) {
                        printk(PREFIX "max C-state count of %u reached\n",
                               ACPI_PROCESSOR_MAX_POWER);
index 84da631ad082bab8d9a6ec9c33a5889d3a6f85cb..aaf2d4ce2b40b1ff121127f9f1f37d14192eee3d 100644 (file)
 #define MSR_IA32_PLATFORM_ID           0x00000017
 #define MSR_IA32_EBL_CR_POWERON                0x0000002a
 #define MSR_IA32_EBC_FREQUENCY_ID      0x0000002c
+#define MSR_IA32_FEATURE_CONTROL       0x0000003a
 #define MSR_IA32_TSC_ADJUST            0x0000003b
 
 #define MSR_IA32_APICBASE              0x0000001b
index edd99751c6983f27daf2ce2e44b79fe88f702eef..b506d6c97cba5637a0678b07b44123d1491462e7 100644 (file)
@@ -192,6 +192,7 @@ XEN_CPUFEATURE(XSAVES,        4*32+ 3) /*S  XSAVES/XRSTORS instructions */
 /* Intel-defined CPU features, CPUID level 0x00000007:0.ebx, word 5 */
 XEN_CPUFEATURE(FSGSBASE,      5*32+ 0) /*A  {RD,WR}{FS,GS}BASE instructions */
 XEN_CPUFEATURE(TSC_ADJUST,    5*32+ 1) /*S  TSC_ADJUST MSR available */
+XEN_CPUFEATURE(SGX,           5*32+ 2) /*   Software Guard extensions */
 XEN_CPUFEATURE(BMI1,          5*32+ 3) /*A  1st bit manipulation extensions */
 XEN_CPUFEATURE(HLE,           5*32+ 4) /*A  Hardware Lock Elision */
 XEN_CPUFEATURE(AVX2,          5*32+ 5) /*A  AVX2 instructions */